#拿到页面源代码
#通过re来提取想要的有效信息 re
import requests
import re
import csv

url = "https://movie.douban.com/top250"
headers = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
}
response = requests.get(url,headers=headers)
page_content = response.text

#解析数据
obj = re.compile(r' <li>.*?<div class="item">.*?<span class="title">(?P<title>.*?)</span>.*?<br>(?P<year>.*?)&nbsp'
                 r'.*?<span class="rating_num" property="v:average">(?P<score>.*?)</span>'
                 r'.*?<span>(?P<num>.*?)人评价</span>',re.S)
result = obj.finditer(page_content)


#存成csv文件
f=open("douban_top250.csv",mode="w",encoding="utf-8")
csvWriter=csv.writer(f)
title_dic = {
    'title': '名称',
    'year': '年份',
    'score': '评分',
    'num': '评价人数'
}
csvWriter.writerow(title_dic.values())
for item in result:
    # print("title:",item.group("title") ,"年份:",item.group("year").strip(),"评分:",item.group("score").strip(),"共",item.group("num").strip(),"人评价")
    dic = item.groupdict()
    dic['year'] = dic['year'].strip()
    csvWriter.writerow(dic.values())
response.close()
print("over")

